Nongenetic inhertance map of systematic reviews

knitr::opts_chunk$set(echo = TRUE)

library(readxl)
library(plyr)
library(tibble)
library(dplyr)
library(tidyverse)
library(stringr)
library(knitr)
library(forcats)
library(ggplot2)
library(hrbrthemes)  #for ggplot2
library(bibliometrix)
library(igraph)

Data loading

# manually extracted data
xldata <- "./Data_extraction_postcrosschecking.xlsx"

# bibliometric data
bib <- convert2df("./bibliometric.bib", dbsource = "scopus", format = "bibtex")

Data organisation

Splitting list of tabs into separate dataframes

excel_sheets(path = xldata)
tab_names <- excel_sheets(path = xldata)


# creating a list of dataframes per tab
list_all <- lapply(tab_names, function(x) read_excel(path = xldata, sheet = x))

# assigning tab names to each dataframe
names(list_all) <- tab_names

# get dataframes out of list
list2env(list_all, .GlobalEnv)

Addressing objectives 1 and 2

  1. Map the SR literature across disciplines e.g., the types of environmental exposures and traits synthesised, the proportion of SRs that examine inter- versus trans-generational effects, and which disciplines dominate the non-genetic inheritance SR literature. The map will highlight gaps in the literature that remain to be synthesised or have very few SRs.

  2. Present discipline-specific research patterns by summarising commonalities and disparities between disciplines (e.g., do SRs of specific environmental exposures dominate one discipline and not others? Do some disciplines focus on inter-generational effects, and another on trans-generational effects?).

Percent of SR’s within disciplines

count_discipline <- Review_info %>%
    count(discipline_code) %>%
    arrange(desc(n))
percent_discipline <- count_discipline %>%
    mutate(percent = (n/sum(n)) * 100)
percent_discipline$discipline_code <- factor(percent_discipline$discipline_code,
    level = percent_discipline$discipline_code[order(percent_discipline$n, decreasing = FALSE)])

ggplot(percent_discipline, aes(x = discipline_code, y = percent)) + geom_col(aes(fill = ""),
    width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
    xlab("Discipline") + scale_fill_manual(values = c("#919191")) + theme(legend.position = "none",
    axis.title.x = element_text(size = 10))

Inter- vs trans-generational effects within and between disciplines

Merged_inter_vs_trans <- merge(Inter_vs_trans_info, Review_info)

count_inter_vs_trans <- Merged_inter_vs_trans %>%
    count(inter_vs_trans_code, by = discipline_code) %>%
    arrange(desc(n))
percent_inter_vs_trans <- count_inter_vs_trans %>%
    mutate(percent = (n/sum(n)) * 100)

percent_inter_vs_trans <- percent_inter_vs_trans %>%
    rename(discipline_code = by)

ggplot(percent_inter_vs_trans, aes(x = inter_vs_trans_code, y = percent)) + geom_col(aes(fill = discipline_code),
    width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
    theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) +
    guides(fill = guide_legend(title = "Discipline:"))

Table showing the controlled vacabularly used and a description of what the controlled vocabulary means.

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.

Inter_vs_trans_info_unique <- Inter_vs_trans_code_info %>%
    select(controlled_vocab_inter_vs_trans, description)
unique(Inter_vs_trans_info_unique)
## # A tibble: 3 x 2
##   controlled_vocab_inter_vs_trans description       
##   <chr>                           <chr>             
## 1 inter                           inter-generational
## 2 trans                           trans-generational
## 3 unclear                         unclear

Descendant generations within and between disciplines

merged_descendant_generat <- merge(Descendant_generat_info, Review_info)

count_descendant_generat <- merged_descendant_generat %>%
    count(descendant_generat_code, by = discipline_code) %>%
    arrange(desc(n))
percent_descendant_generat <- count_descendant_generat %>%
    mutate(percent = (n/sum(n)) * 100)

percent_descendant_generat <- percent_descendant_generat %>%
    rename(discipline_code = by)

ggplot(percent_descendant_generat, aes(x = descendant_generat_code, y = percent)) +
    geom_col(aes(fill = discipline_code), width = 0.7) + theme_light() + coord_flip() +
    scale_y_continuous(name = "Percent") + theme(legend.position = "bottom", axis.title.x = element_text(size = 10),
    axis.title.y = element_blank()) + guides(fill = guide_legend(title = "Discipline:"))

Terminology used within and between disciplines

I.e., does the use of inter- and trans-generational inheritance match our definitions (see Fig. 1)

count_terminology <- Review_info %>%
    count(terminology_code, by = discipline_code) %>%
    arrange(desc(n))
percent_terminology <- count_terminology %>%
    mutate(percent = (n/sum(n)) * 100)

percent_terminology <- percent_terminology %>%
    rename(discipline_code = by)

ggplot(percent_terminology, aes(x = terminology_code, y = percent)) + geom_col(aes(fill = discipline_code),
    width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
    theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) +
    guides(fill = guide_legend(title = "Discipline:"))

Table showing the controlled vacabularly used and a description of what the controlled vocabulary means.

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.

terminology_code_unique <- Terminology_code_info %>%
    select(controlled_vocab_terminology, description_terminology)
unique(terminology_code_unique)
## # A tibble: 4 x 2
##   controlled_vocab_terminology description_terminology 
##   <chr>                        <chr>                   
## 1 yes                          yes                     
## 2 no                           no                      
## 3 not used                     does not use these terms
## 4 unclear                      unclear

Types of non-genetic transmission within and between disciplines

I.e., are the non-genetic effects conferred through the matriline or patriline etc.

Merged_transmission_info <- merge(Transmission_info, Review_info)

count_transmission <- Merged_transmission_info %>%
    count(transmission_code, by = discipline_code) %>%
    arrange(desc(n))
percent_transmission <- count_transmission %>%
    mutate(percent = (n/sum(n)) * 100)

percent_transmission <- percent_transmission %>%
    rename(discipline_code = by)

ggplot(percent_transmission, aes(x = transmission_code, y = percent)) + geom_col(aes(fill = discipline_code),
    width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
    theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) +
    guides(fill = guide_legend(title = "Discipline:"))

Table showing the controlled vacabularly used and a description of what the controlled vocabulary means.

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.

Transmission_info_unique <- Transmission_code_info %>%
    select(controlled_vocab_transmission, description)
unique(Transmission_info_unique)
## # A tibble: 4 x 2
##   controlled_vocab_transmi… description                                         
##   <chr>                     <chr>                                               
## 1 matriline                 matriline                                           
## 2 patriline                 patriline                                           
## 3 not separated             not separated (i.e., the primary studies used do no…
## 4 unclear                   unclear/unspecefied

F0 environmental manipulations within and between disciplines

merged_F0_env <- merge(F0_env_info, Review_info)

count_F0_env <- merged_F0_env %>%
    count(F0_env_code, by = discipline_code) %>%
    arrange(desc(n))
percent_F0_env <- count_F0_env %>%
    mutate(percent = (n/sum(n)) * 100)

percent_F0_env <- percent_F0_env %>%
    rename(discipline_code = by)

ggplot(percent_F0_env, aes(x = F0_env_code, y = percent)) + geom_col(aes(fill = discipline_code),
    width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
    theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) +
    guides(fill = guide_legend(title = "Discipline:"))

Table showing the controlled vacabularly used and a description of what the controlled vocabulary means.

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.

F0_env_info_unique <- F0_env_code_info %>%
    select(controlled_vocab_F0_env, description)
unique(F0_env_info_unique)
## # A tibble: 9 x 2
##   controlled_vocab_F0_e… description                                            
##   <chr>                  <chr>                                                  
## 1 diet                   diet                                                   
## 2 human induced          human induced pollutant/toxin                          
## 3 environmental composi… natural variation in environmental composition (e.g., …
## 4 psychological stress   psychological stress (e.g., post-natal separation)     
## 5 temperature            temperature                                            
## 6 human health           ‘human health’ related environments (e.g., tobacco, al…
## 7 population demographic differences in population demographics (e.g., populati…
## 8 light                  light and/or photoperiod                               
## 9 other                  other/unclear

Environmental effect direction within and between disciplines

I.e., are the effects of environment predicted to have negative, positive, or neutral effects on offspring phenotype

merged_env_eff <- merge(Env_eff_diection_info, Review_info)

count_env_eff <- merged_env_eff %>%
    count(env_eff_direction_code, by = discipline_code) %>%
    arrange(desc(n))
percent_env_eff <- count_env_eff %>%
    mutate(percent = (n/sum(n)) * 100)

percent_env_eff <- percent_env_eff %>%
    rename(discipline_code = by)

ggplot(percent_env_eff, aes(x = env_eff_direction_code, y = percent)) + geom_col(aes(fill = discipline_code),
    width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
    theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) +
    guides(fill = guide_legend(title = "Discipline:"))

Table showing the controlled vacabularly used and a description of what the controlled vocabulary means.

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.

Env_eff_direction_info_unique <- Env_eff_direction_code_info %>%
    select(controlled_vocab_env_eff_direction, description)
unique(Env_eff_direction_info_unique)
## # A tibble: 3 x 2
##   controlled_vocab_env_eff_direction description
##   <chr>                              <chr>      
## 1 negative                           negative   
## 2 positive                           positive   
## 3 unclear                            unclear

F0 environmental exposure timing within and between disciplines

Note: We excluded SRs that solely focused on environmental exposures that occured when the F0 generation was a fetus (i.e., pre-natal). However, some broader SRs (i.e., eco evo SRs) included primary studies where the F0 generation was exposed pre-natally. This was therefore coded in our data.

merged_exposure_timing <- merge(Exposure_timing_info, Review_info)

count_exposure_timing <- merged_exposure_timing %>%
    count(exposure_timing_code, by = discipline_code) %>%
    arrange(desc(n))
percent_exposure_timing <- count_exposure_timing %>%
    mutate(percent = (n/sum(n)) * 100)

percent_exposure_timing <- percent_exposure_timing %>%
    rename(discipline_code = by)

ggplot(percent_exposure_timing, aes(x = exposure_timing_code, y = percent)) + geom_col(aes(fill = discipline_code),
    width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
    theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) +
    guides(fill = guide_legend(title = "Discipline:"))

Table showing the controlled vacabularly used, and a description of what the controlled vocabulary means.

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.

Exposure_timing_info_unique <- Exposure_timing_code_info %>%
    select(controlled_vocab_exposure_timing, description)
unique(Exposure_timing_info_unique)
## # A tibble: 5 x 2
##   controlled_vocab_exposure_timing description                               
##   <chr>                            <chr>                                     
## 1 pre-natal                        pre-natally                               
## 2 post-natal                       post-natal before sexual maturity         
## 3 post-sexual maturity             after sexual maturity but before gestation
## 4 gestation                        during gestation                          
## 5 unclear                          other/unclear

Descendant traits within and between disciplines

merged_descendant_trait <- merge(Descendant_trait_info, Review_info)

count_descendant_trait <- merged_descendant_trait %>%
    count(descendant_trait_code, by = discipline_code) %>%
    arrange(desc(n))
percent_descendant_trait <- count_descendant_trait %>%
    mutate(percent = (n/sum(n)) * 100)

percent_descendant_trait <- percent_descendant_trait %>%
    rename(discipline_code = by)

ggplot(percent_descendant_trait, aes(x = descendant_trait_code, y = percent)) + geom_col(aes(fill = discipline_code),
    width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
    theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) +
    guides(fill = guide_legend(title = "Discipline:"))

Table showing the controlled vacabularly used and a description of what the controlled vocabulary means.

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.

Descendant_trait_info_unique <- Descendant_trait_code_info %>%
    select(controlled_vocab_descendant_trait, description)
unique(Descendant_trait_info_unique)
## # A tibble: 9 x 2
##   controlled_vocab_descendan… description                                       
##   <chr>                       <chr>                                             
## 1 physiological               physiological (e.g., immune function, insulin lev…
## 2 morphological               morphological (e.g., body size, adiposity, colour…
## 3 reproductive                reproductive (e.g., fecundity and sexual trait me…
## 4 life history                life-history (e.g., developmental rate, aging and…
## 5 survival                    descendant survival/aging (must be post-natally)  
## 6 behavioural                 behavioural (e.g., response to stimuli, anxiety, …
## 7 molecular                   molecular (e.g., gene expression, DNA methylation…
## 8 health                      health and disease (e.g., disease prevalence )    
## 9 unclear                     other/unclear

Descendant sex within and between disciplines

merged_descendant_sex <- merge(Descendant_sex_info, Review_info)

count_descendant_sex <- merged_descendant_sex %>%
    count(descendant_sex_code, by = discipline_code) %>%
    arrange(desc(n))
percent_descendant_sex <- count_descendant_sex %>%
    mutate(percent = (n/sum(n)) * 100)

percent_descendant_sex <- percent_descendant_sex %>%
    rename(discipline_code = by)

ggplot(percent_descendant_sex, aes(x = descendant_sex_code, y = percent)) + geom_col(aes(fill = discipline_code),
    width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
    theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) +
    guides(fill = guide_legend(title = "Discipline:"))

Table showing the controlled vacabularly used and a description of what the controlled vocabulary means.

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.

Descendant_sex_info_unique <- Descendant_sex_code_info %>%
    select(controlled_vocab_descendant_sex, description)
unique(Descendant_sex_info_unique)
## # A tibble: 3 x 2
##   controlled_vocab_descendant_sex description
##   <chr>                           <chr>      
## 1 males                           males      
## 2 females                         females    
## 3 unclear                         unclear

Descendant age within and between disciplines

Note: We excluded SRs that solely focused on fetal traits in descendants. However, some broader SRs included primary studies that mearured fetal traits. This was therefore coeded in our data.

merged_descendant_age <- merge(Descendant_age_info, Review_info)

count_descendant_age <- merged_descendant_age %>%
    count(descendant_age_code, by = discipline_code) %>%
    arrange(desc(n))
percent_descendant_age <- count_descendant_age %>%
    mutate(percent = (n/sum(n)) * 100)

percent_descendant_age <- percent_descendant_age %>%
    rename(discipline_code = by)

ggplot(percent_descendant_age, aes(x = descendant_age_code, y = percent)) + geom_col(aes(fill = discipline_code),
    width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
    theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) +
    guides(fill = guide_legend(title = "Discipline:"))

Table showing the controlled vacabularly used and a description of what the controlled vocabulary means.

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.

Descendant_age_info_unique <- Descendant_age_code_info %>%
    select(controlled_vocab_descendant_age, description)
unique(Descendant_age_info_unique)
## # A tibble: 5 x 2
##   controlled_vocab_descendant_… description                                     
##   <chr>                         <chr>                                           
## 1 fetal                         fetal/embryonic                                 
## 2 juvenile                      juvenile (post-embryonic/birth prior to sexual …
## 3 adult                         adult                                           
## 4 ongoing                       ongoing                                         
## 5 unclear                       other/unclear

Higher taxononmic groups within and between disciplines

Merged_higher_taxon <- merge(Higher_taxon_info, Review_info)

count_higher_taxon <- Merged_higher_taxon %>%
    count(taxon_code, by = discipline_code) %>%
    arrange(desc(n))
percent_higher_taxon <- count_higher_taxon %>%
    mutate(percent = (n/sum(n)) * 100)

percent_higher_taxon <- percent_higher_taxon %>%
    rename(discipline_code = by)

ggplot(percent_higher_taxon, aes(x = taxon_code, y = percent)) + geom_col(aes(fill = discipline_code),
    width = 0.7) + theme_light() + coord_flip() + scale_y_continuous(name = "Percent") +
    theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) +
    guides(fill = guide_legend(title = "Discipline:"))

Table showing the controlled vacabularly used and a description of what the controlled vocabulary means.

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.

Higher_taxon_info_unique <- Taxon_code_info %>%
    select(controlled_vocab_taxon, description)
unique(Higher_taxon_info_unique)
## # A tibble: 5 x 2
##   controlled_vocab_taxon description  
##   <chr>                  <chr>        
## 1 vertebrates            vertebrates  
## 2 invertebrates          invertebrates
## 3 plants                 plants       
## 4 other                  other        
## 5 unclear                unclear

Descendent generation vs terminology use across disciplines

Does the terminology used (i.e., inter- vs trans-generational) match our definition (based on generation, sex and taxa) within the descendant generations examined

generat_terminology <- merge(Descendant_generat_info, Review_info)

count_generat_terminology <- generat_terminology %>%
    count(descendant_generat_code, by = terminology_code) %>%
    arrange(desc(n))
percent_generat_terminology <- count_generat_terminology %>%
    mutate(percent = (n/sum(n)) * 100)

percent_generat_terminology <- percent_generat_terminology %>%
    rename(terminology_code = by)

ggplot(percent_generat_terminology, aes(x = descendant_generat_code, y = percent)) +
    geom_col(aes(fill = terminology_code), width = 0.7) + theme_light() + coord_flip() +
    scale_y_continuous(name = "Percent") + theme(legend.position = "bottom", axis.title.x = element_text(size = 10),
    axis.title.y = element_blank()) + guides(fill = guide_legend(title = "Terminology:"))

Time trend of the number of SRs per year

Publication_info %>%
    count(year) %>%
    ggplot(aes(x = year, y = n)) + geom_area(fill = "#919191", alpha = 1) + geom_line(color = "skyblue",
    size = 1) + geom_point(size = 1, color = "blue") + theme_minimal() + scale_x_continuous(name = "",
    limits = c(2005, 2020)) + scale_y_continuous(name = "Article count", limits = c(0,
    10)) + ggtitle("Publication year") + theme(plot.title = element_text(hjust = 0.5))

Adressing objective 3

  1. Conduct bibliometric analyses of co-author networks and common terminology use across and within disciplines.

The following visualisations allow us to view bibliometric patterns from data exported directly from scopus. We are able to view common keyword use, author networks, affiliations, and citation patterns.

Author, country, and citation summary plots

results1 <- biblioAnalysis(bib)  #run basic standard descriptive analysis of the dataset (data frame)
summary(results1, k = 7, pause = F, width = 130)  #produces a sequence of standard summary tables displayed in the console
plot(results1, k = 7, pause = F)

Keyword matrix plot

NetMatrix_keywords <- biblioNetwork(bib, analysis = "co-occurrences", network = "keywords", sep = ";")
NetMatrix_keywords_plot <- networkPlot(NetMatrix_keywords, normalize = "association", n = 10, Title = "Keyword co-occurrences", type = "fruchterman",
    size.cex = TRUE, size = 30, remove.multiple = F, edgesize = 10, labelsize = 3, label.cex = TRUE, edges.min = 2, cluster = "edge_betweenness")

Thematic map

map_thematic <- thematicMap(bib, field = "ID", n = 1000, minfreq = 5, stemming = FALSE, size = 0.5, n.labels = 1, repel = TRUE)
plot(map_thematic$map)

Author collaboration network

NetMatrix_authors <- biblioNetwork(bib, analysis = "collaboration", network = "authors", sep = ";")
NetMatrix_authors_plot <- networkPlot(NetMatrix_authors, n = 20, Title = "Author collaboration", type = "auto", size = 15, size.cex = TRUE,
    edgesize = 10, labelsize = 1)

Country collaboration network

bib_sco2 <- metaTagExtraction(bib, Field = "AU_CO", sep = ";")  #we need to extract countries from the affiliations first
# bib_sco2$AU_CO[1:10]
NetMatrix_country <- biblioNetwork(bib_sco2, analysis = "collaboration", network = "countries", sep = ";")
NetMatrix_country_plot <- networkPlot(NetMatrix_country, n = 50, Title = "Country collaboration", type = "auto", size = TRUE, remove.multiple = FALSE,
    labelsize = 1.5)

Addressing objective 4

  1. Conduct a critical appraisal of the SR literature to assess the rigour, transparency, and risk of bias.

The following visualisations allow us to see the average scores across each CEESAT questions as well as how each individual SR scored for each CEESAT questions. This will allow us to assess the quality and Risk of Bias of the SR literature.

Note: CEESAT questions and scoring criteria can be found in Appendix_S3 on the open Science Framework https://osf.io/detvk/.

Blinding paper ID and wrangling data into long format

Paper ID was blinded for the pilot but will not be blinded for the full study

# blinding authors
Assessment$id <- paste("ID", c(1:length(Assessment$id)), sep = "")
# shortening column names
names(Assessment) <- gsub("CEESAT_", "", names(Assessment), fixed = TRUE)
# selecting only the columns with scores selecting only the columns with scores
Assessment_reduced <- select(Assessment, c("id", !ends_with("_comment")))
# wrangling data into long format
ceesat_long <- gather(Assessment_reduced, question, score, Q1.1:Q8.1, factor_key = TRUE)

CEESAT score summary across SRs

This plot shows the average CEESAT score per question. CEESAT questions and scoring criteria can be found in Appendix_S3 on the open Science Framework https://osf.io/detvk/.

# calculating the % of scores within each questions
count_ceesat_score <- ceesat_long %>%
    count(score, by = question)
percent_ceesat_score <- count_ceesat_score %>%
    mutate(percent = (n/sum(n)) * 100)
percent_ceesat_score <- percent_ceesat_score %>%
    rename(question = by)
percent_ceesat_score$question <- as.factor(percent_ceesat_score$question)
percent_ceesat_score$question <- factor(percent_ceesat_score$question, levels(percent_ceesat_score$question)[length(percent_ceesat_score$question):1])  #reverse the order of questions
percent_ceesat_score$score <- as.factor(percent_ceesat_score$score)
percent_ceesat_score$score <- factor(percent_ceesat_score$score, levels(percent_ceesat_score$score)[c(2, 3, 1, 4)])  #set the order of levels for assessment scores:
summaryplot <- ggplot(data = percent_ceesat_score, x = question, y = percent) + geom_col(mapping = aes(x = question, y = percent, fill = score),
    width = 0.7, position = "fill", color = "black") + coord_flip(ylim = c(0, 1)) + guides(fill = guide_legend(reverse = TRUE)) + scale_fill_manual(values = c("yellow",
    "green", "orange", "red")) + theme(legend.position = "bottom", panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
    panel.background = element_blank()) + ylab("Percent") + xlab("CEESAT question") + guides(fill = guide_legend(title = "Score:"))
summaryplot

Individual CEESAT Scores

This plot shows the CEESAT score per SR. CEESAT questions and scoring criteria can be found in Appendix_S3 on the open Science Framework https://osf.io/detvk/.

scoresplot <- ggplot(data = ceesat_long, aes(y = id, x = question)) + geom_tile(color = "black", fill = "white", size = 0.8) + geom_point(aes(color = as.factor(score)),
    size = 5) + scale_x_discrete(position = "top") + guides(color = guide_legend(reverse = TRUE)) + scale_color_manual(values = c("orange",
    "yellow", "green", "red"), name = "Score:") + theme_minimal() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
    panel.background = element_blank(), legend.position = "bottom", legend.background = element_rect(linetype = "solid", colour = "grey"),
    legend.key.size = unit(0.75, "cm"), legend.text = element_text(size = 12), axis.text.y = element_text(size = 10, color = "black"),
    axis.text.x = element_text(angle = 45, hjust = 0), plot.margin = unit(c(1, 1, 1, 0), "cm")) + ylab("Study ID") + xlab("CEESAT question")
scoresplot

---
title: "Nongenetic inhertance map of systematic reviews"
author: "Erin Macartney, Szymek Drobniak, Shinichi Nakagawa, Malgorzata Lagisz"
output: 
    
    rmdformats::readthedown:
      code_folding: hide
      code_download: true
      toc_depth: 4
editor_options: 
  chunk_output_type: console
---
    
```{r, include = FALSE}
knitr::opts_chunk$set(
message = FALSE,
warning = FALSE,
cache = TRUE, 
tidy = TRUE, 
echo = TRUE
)

rm(list = ls())
```


```{r setup, results = 'hide'}

knitr::opts_chunk$set(echo = TRUE)

library(readxl)
library(plyr)
library(tibble)
library(dplyr)
library(tidyverse)
library(stringr)
library(knitr)
library(forcats)
library(ggplot2)
library(hrbrthemes) #for ggplot2
library(bibliometrix)
library(igraph)

```
### Data loading

``` {r, results = 'hide'}
#manually extracted data
xldata <- "./Data_extraction_postcrosschecking.xlsx"

#bibliometric data
bib <- convert2df("./bibliometric.bib", dbsource = "scopus", format = "bibtex")
```


### Data organisation
Splitting list of tabs into separate dataframes

``` {r, results = 'hide'}

excel_sheets(path = xldata)
tab_names <- excel_sheets(path = xldata)


#creating a list of dataframes per tab
list_all <- lapply(tab_names, function(x) read_excel(path = xldata, sheet = x))

#assigning tab names to each dataframe
names(list_all) <- tab_names

#get dataframes out of list
list2env(list_all, .GlobalEnv)
```

## Addressing objectives 1 and 2 {.tabset}

1)	Map the SR literature across disciplines e.g., the types of environmental exposures and traits synthesised, the proportion of SRs that examine inter- versus trans-generational effects, and which disciplines dominate the non-genetic inheritance SR literature. The map will highlight gaps in the literature that remain to be synthesised or have very few SRs.

2)	Present discipline-specific research patterns by summarising commonalities and disparities between disciplines (e.g., do SRs of specific environmental exposures dominate one discipline and not others? Do some disciplines focus on inter-generational effects, and another on trans-generational effects?).

### Percent of SR's within disciplines

``` {r}

count_discipline <-Review_info %>% count(discipline_code) %>% arrange(desc(n)) 
percent_discipline <- count_discipline %>% mutate(percent = (n/sum(n))*100)
percent_discipline$discipline_code <- factor(percent_discipline$discipline_code, level = percent_discipline$discipline_code[order(percent_discipline$n, decreasing = FALSE)])

ggplot(percent_discipline, aes(x = discipline_code, y = percent)) + 
  geom_col(aes(fill = ""), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  xlab("Discipline") + 
  scale_fill_manual(values = c("#919191")) +
  theme(legend.position = "none", axis.title.x = element_text(size = 10))

```

### Inter- vs trans-generational effects within and between disciplines

```{r}

Merged_inter_vs_trans <- merge(Inter_vs_trans_info, Review_info)

count_inter_vs_trans <- Merged_inter_vs_trans %>% count(inter_vs_trans_code, by = discipline_code ) %>% arrange(desc(n))
percent_inter_vs_trans <- count_inter_vs_trans %>% mutate(percent = (n/sum(n))*100)

percent_inter_vs_trans <-percent_inter_vs_trans %>%
  rename(
    discipline_code = by
  )

ggplot(percent_inter_vs_trans, aes(x = inter_vs_trans_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

#### Table showing the controlled vacabularly used and a description of what the controlled vocabulary means. 

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
``` {r}
Inter_vs_trans_info_unique <- Inter_vs_trans_code_info %>% 
  select(
  controlled_vocab_inter_vs_trans, description
  ) 
unique(Inter_vs_trans_info_unique)
```

### Descendant generations within and between disciplines

```{r}

merged_descendant_generat <- merge(Descendant_generat_info, Review_info)

count_descendant_generat <- merged_descendant_generat %>% count(descendant_generat_code, by = discipline_code) %>% arrange(desc(n))
percent_descendant_generat <- count_descendant_generat %>% mutate(percent = (n/sum(n))*100)

percent_descendant_generat <- percent_descendant_generat %>% 
  rename (
    discipline_code = by
  )

ggplot(percent_descendant_generat, aes(x = descendant_generat_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

### Terminology used within and between disciplines
I.e., does the use of inter- and trans-generational inheritance match our definitions (see Fig. 1)

``` {r}

count_terminology <- Review_info %>% count(terminology_code, by = discipline_code) %>% arrange(desc(n))
percent_terminology <- count_terminology %>% mutate(percent = (n/sum(n))*100)

percent_terminology<- percent_terminology %>%
  rename(
    discipline_code = by
  )

ggplot(percent_terminology, aes(x = terminology_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() +
  coord_flip() +  
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

#### Table showing the controlled vacabularly used and a description of what the controlled vocabulary means. 

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
``` {r}
terminology_code_unique <- Terminology_code_info %>% 
  select(
    controlled_vocab_terminology, description_terminology
  )
unique(terminology_code_unique)
```

### Types of non-genetic transmission within and between disciplines
I.e., are the non-genetic effects conferred through the matriline or patriline etc.

```{r}

Merged_transmission_info <- merge(Transmission_info, Review_info)

count_transmission <- Merged_transmission_info %>%  count(transmission_code, by = discipline_code) %>% arrange(desc(n))
percent_transmission <- count_transmission %>% mutate(percent = (n/sum(n))*100)

percent_transmission <-percent_transmission %>%
  rename(
    discipline_code = by
  )

ggplot(percent_transmission, aes(x = transmission_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

#### Table showing the controlled vacabularly used and a description of what the controlled vocabulary means. 

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
``` {r}
Transmission_info_unique <- Transmission_code_info %>% 
  select(
    controlled_vocab_transmission, description
  )
unique(Transmission_info_unique) 

```

### F0 environmental manipulations within and between disciplines

```{r}

merged_F0_env <- merge(F0_env_info, Review_info)

count_F0_env<- merged_F0_env %>% count(F0_env_code, by = discipline_code ) %>% arrange(desc(n))
percent_F0_env <- count_F0_env %>% mutate(percent = (n/sum(n))*100)
 
percent_F0_env <-percent_F0_env %>%
  rename(
    discipline_code = by
  )

ggplot(percent_F0_env, aes(x = F0_env_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

#### Table showing the controlled vacabularly used and a description of what the controlled vocabulary means. 

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
``` {r}
F0_env_info_unique <- F0_env_code_info %>% 
  select(
    controlled_vocab_F0_env, description
  )
unique(F0_env_info_unique)
```

### Environmental effect direction within and between disciplines
I.e., are the effects of environment predicted to have negative, positive, or neutral effects on offspring phenotype

```{r}

merged_env_eff <- merge(Env_eff_diection_info, Review_info)

count_env_eff <- merged_env_eff %>% count(env_eff_direction_code, by = discipline_code) %>% arrange(desc(n))
percent_env_eff <- count_env_eff %>% mutate(percent = (n/sum(n))*100)

percent_env_eff <-percent_env_eff %>%
  rename(
    discipline_code = by
  )

ggplot(percent_env_eff, aes(x = env_eff_direction_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

#### Table showing the controlled vacabularly used and a description of what the controlled vocabulary means. 

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
``` {r}
Env_eff_direction_info_unique <- Env_eff_direction_code_info %>% 
  select(
    controlled_vocab_env_eff_direction, description
  )
unique(Env_eff_direction_info_unique) 
```

### F0 environmental exposure timing within and between disciplines
**Note:** We excluded SRs that solely focused on environmental exposures that occured when the F0 generation was a fetus (i.e., pre-natal). However,  some broader SRs (i.e., eco evo SRs) included primary studies where the F0 generation was exposed pre-natally. This was therefore coded in our data. 

```{r}

merged_exposure_timing <- merge(Exposure_timing_info, Review_info)

count_exposure_timing <- merged_exposure_timing %>% count(exposure_timing_code, by = discipline_code) %>% arrange(desc(n))
percent_exposure_timing <- count_exposure_timing %>% mutate(percent = (n/sum(n))*100)

percent_exposure_timing <-percent_exposure_timing %>%
  rename (
    discipline_code = by
  )

ggplot(percent_exposure_timing, aes(x = exposure_timing_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

#### Table showing the controlled vacabularly used, and a description of what the controlled vocabulary means. 

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
``` {r}
Exposure_timing_info_unique<- Exposure_timing_code_info %>% 
  select(
    controlled_vocab_exposure_timing, description
  )
unique(Exposure_timing_info_unique) 
```

### Descendant traits within and between disciplines 

```{r}

merged_descendant_trait <- merge(Descendant_trait_info, Review_info)

count_descendant_trait <- merged_descendant_trait %>% count(descendant_trait_code, by = discipline_code) %>% arrange(desc(n))
percent_descendant_trait <- count_descendant_trait %>% mutate(percent = (n/sum(n))*100)

percent_descendant_trait <- percent_descendant_trait %>%
  rename(
    discipline_code = by
  )

ggplot(percent_descendant_trait, aes(x = descendant_trait_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() + 
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

#### Table showing the controlled vacabularly used and a description of what the controlled vocabulary means. 

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
``` {r}
Descendant_trait_info_unique<-Descendant_trait_code_info %>% 
  select(
     controlled_vocab_descendant_trait,description
  )
unique(Descendant_trait_info_unique) 
```

### Descendant sex within and between disciplines

```{r}

merged_descendant_sex <- merge(Descendant_sex_info, Review_info)

count_descendant_sex <- merged_descendant_sex %>% count(descendant_sex_code, by = discipline_code) %>% arrange(desc(n))
percent_descendant_sex <- count_descendant_sex  %>% mutate(percent = (n/sum(n))*100)

percent_descendant_sex <- percent_descendant_sex %>%
  rename(
    discipline_code = by
           )

ggplot(percent_descendant_sex, aes(x = descendant_sex_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

#### Table showing the controlled vacabularly used and a description of what the controlled vocabulary means. 

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
``` {r}
Descendant_sex_info_unique<- Descendant_sex_code_info%>% 
  select(
    controlled_vocab_descendant_sex, description
  )
unique(Descendant_sex_info_unique)
```

### Descendant age within and between disciplines
**Note:** We excluded SRs that solely focused on fetal traits in descendants. However, some broader SRs included primary studies that mearured fetal traits. This was therefore coeded in our data. 

```{r}

merged_descendant_age <- merge(Descendant_age_info, Review_info)

count_descendant_age <- merged_descendant_age %>% count(descendant_age_code, by = discipline_code) %>% arrange(desc(n))
percent_descendant_age <- count_descendant_age %>% mutate(percent = (n/sum(n))*100)

percent_descendant_age <- percent_descendant_age %>%
  rename(
    discipline_code = by
  )

ggplot(percent_descendant_age, aes(x = descendant_age_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

#### Table showing the controlled vacabularly used and a description of what the controlled vocabulary means. 

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
``` {r}
Descendant_age_info_unique<- Descendant_age_code_info %>% 
  select(
    controlled_vocab_descendant_age, description
  )
unique(Descendant_age_info_unique) 
```

### Higher taxononmic groups within and between disciplines 

``` {r}

Merged_higher_taxon <- merge(Higher_taxon_info, Review_info)

count_higher_taxon <- Merged_higher_taxon %>% count(taxon_code, by = discipline_code) %>% arrange(desc(n))
percent_higher_taxon <- count_higher_taxon %>% mutate(percent = (n/sum(n))*100)

percent_higher_taxon<-percent_higher_taxon %>%
  rename(
    discipline_code = by
  )

ggplot(percent_higher_taxon, aes(x = taxon_code, y = percent)) + 
  geom_col(aes(fill = discipline_code), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Discipline:"))

```

#### Table showing the controlled vacabularly used and a description of what the controlled vocabulary means. 

Note that the description often equals controlled vocab but this is not always the case for more complicated elements.
``` {r}
Higher_taxon_info_unique<- Taxon_code_info %>% 
  select(
    controlled_vocab_taxon, description
  )
unique(Higher_taxon_info_unique)
```

### Descendent generation vs terminology use across disciplines
Does the terminology used (i.e., inter- vs trans-generational) match our definition (based on generation, sex and taxa) within the  descendant generations examined

``` {r}

generat_terminology <- merge(Descendant_generat_info, Review_info)
  
count_generat_terminology <- generat_terminology %>% count(descendant_generat_code, by = terminology_code) %>% arrange(desc(n))
percent_generat_terminology <- count_generat_terminology %>% mutate(percent = (n/sum(n))*100)

percent_generat_terminology<-percent_generat_terminology %>%
  rename(
    terminology_code = by
  )

ggplot(percent_generat_terminology, aes(x = descendant_generat_code, y = percent)) + 
  geom_col(aes(fill = terminology_code), width = 0.7) + 
  theme_light() +
  coord_flip() + 
  scale_y_continuous(name = "Percent") +
  theme(legend.position = "bottom", axis.title.x = element_text(size = 10), axis.title.y = element_blank()) + 
  guides(fill=guide_legend(title="Terminology:"))
```

### Time trend of the number of SRs per year

``` {r}

Publication_info %>% count(year) %>% ggplot(aes(x = year, y = n)) + 
  geom_area(fill = '#919191', alpha = 1) +
  geom_line(color = 'skyblue', size = 1) + 
  geom_point(size=1, color = 'blue') +
  theme_minimal() +
  scale_x_continuous(name = "", limits = c(2005, 2020)) +
  scale_y_continuous(name = "Article count", limits = c(0, 10)) +
  ggtitle("Publication year") + 
  theme(plot.title = element_text(hjust = 0.5))

```

## Adressing objective 3 {.tabset}

3)	Conduct bibliometric analyses of co-author networks and common terminology use across and within disciplines. 

The following visualisations allow us to view bibliometric patterns from data exported directly from scopus. We are able to view common keyword use, author networks, affiliations, and citation patterns. 

### Author, country, and citation summary plots

``` {r, message = FALSE, warning = FALSE, results = 'hide'}
results1 <- biblioAnalysis(bib) #run basic standard descriptive analysis of the dataset (data frame)
summary(results1, k = 7, pause = F, width = 130) #produces a sequence of standard summary tables displayed in the console
plot(results1, k = 7, pause = F)
```

### Keyword matrix plot

``` {r, message = FALSE, warning = FALSE}
NetMatrix_keywords <- biblioNetwork(bib, analysis = "co-occurrences", network = "keywords", sep = ";")
NetMatrix_keywords_plot <- networkPlot(NetMatrix_keywords, normalize="association", n = 10, Title = "Keyword co-occurrences", type = "fruchterman", size.cex = TRUE, size = 30, remove.multiple = F, edgesize = 10, labelsize = 3, label.cex = TRUE,edges.min = 2, cluster = "edge_betweenness")
```

### Thematic map

``` {r, message = FALSE, warning = FALSE}
map_thematic <- thematicMap(bib, field = "ID", n = 1000, minfreq = 5, stemming = FALSE, size = 0.5, n.labels = 1, repel = TRUE)
plot(map_thematic$map)
```

### Author collaboration network

``` {r, message = FALSE, warning = FALSE}
NetMatrix_authors <- biblioNetwork(bib, analysis = "collaboration",  network = "authors", sep = ";")
NetMatrix_authors_plot <- networkPlot(NetMatrix_authors,  n = 20, Title = "Author collaboration", type = "auto", size = 15, size.cex = TRUE, edgesize = 10, labelsize = 1) 
```

### Country collaboration network

```{r, message = FALSE, warning = FALSE}
bib_sco2 <- metaTagExtraction(bib, Field = "AU_CO", sep = ";") #we need to extract countries from the affiliations first
#bib_sco2$AU_CO[1:10]
NetMatrix_country <- biblioNetwork(bib_sco2, analysis = "collaboration", network = "countries", sep = ";")
NetMatrix_country_plot <-  networkPlot(NetMatrix_country, n = 50, Title = "Country collaboration", type = "auto", size=TRUE, remove.multiple=FALSE, labelsize=1.5)
```

## Addressing objective 4

4)	Conduct a critical appraisal of the SR literature to assess the rigour, transparency, and risk of bias. 

The following visualisations allow us to see the average scores across each CEESAT questions as well as how each individual SR scored for each CEESAT questions. This will allow us to assess the quality and Risk of Bias of the SR literature. 

**Note:** CEESAT questions and scoring criteria can be found in Appendix_S3 on the open Science Framework https://osf.io/detvk/.

### Blinding paper ID and wrangling data into long format

Paper ID was blinded for the pilot but will not be blinded for the full study

```{r}
#blinding authors
Assessment$id <- paste("ID", c(1:length(Assessment$id)), sep = "")
#shortening column names
names(Assessment) <- gsub("CEESAT_", "", names(Assessment), fixed = TRUE)
#selecting only the columns with scores
#selecting only the columns with scores
Assessment_reduced <- select(Assessment, c("id", !ends_with("_comment")))
#wrangling data into long format
ceesat_long <- gather(Assessment_reduced, question, score, Q1.1:Q8.1, factor_key=TRUE)
```

### CEESAT score summary across SRs
This plot shows the average CEESAT score per question. 
CEESAT questions and scoring criteria can be found in Appendix_S3 on the open Science Framework https://osf.io/detvk/.

``` {r}
#calculating the % of scores within each questions 
count_ceesat_score <- ceesat_long %>% count(score, by = question) 
percent_ceesat_score <- count_ceesat_score %>% mutate(percent = (n/sum(n))*100)
percent_ceesat_score <- percent_ceesat_score %>%
  rename(
    question = by
  )
percent_ceesat_score$question <- as.factor(percent_ceesat_score$question)
percent_ceesat_score$question <- factor(percent_ceesat_score$question, levels(percent_ceesat_score$question)[length(percent_ceesat_score$question):1]) #reverse the order of questions
percent_ceesat_score$score <- as.factor(percent_ceesat_score$score)
percent_ceesat_score$score <- factor(percent_ceesat_score$score, levels(percent_ceesat_score$score)[c(2,3,1,4)]) #set the order of levels for assessment scores:
summaryplot <- ggplot(data = percent_ceesat_score, x = question, y = percent) +
  geom_col(mapping = aes(x = question, y = percent, fill = score), width = 0.7,
           position = "fill", color = "black") +
  coord_flip(ylim = c(0, 1)) +
  guides(fill = guide_legend(reverse = TRUE)) +
  scale_fill_manual(values = c("yellow","green", "orange","red")) +
  theme(legend.position = "bottom", panel.grid.major = element_blank(),panel.grid.minor = element_blank(),panel.background = element_blank()) + 
  ylab("Percent") + xlab("CEESAT question") +
  guides(fill=guide_legend(title="Score:")) 
summaryplot
```

### Individual CEESAT Scores 
This plot shows the CEESAT score per SR.
CEESAT questions and scoring criteria can be found in Appendix_S3 on the open Science Framework https://osf.io/detvk/.

``` {r}
scoresplot <- ggplot(data = ceesat_long, aes(y = id, x = question)) +
  geom_tile(color="black", fill="white", size = 0.8) +
  geom_point(aes(color = as.factor(score)), size = 5) +
  scale_x_discrete(position = "top") +
  guides(color = guide_legend(reverse = TRUE)) +
  scale_color_manual(values = c("orange","yellow","green","red" ), name = "Score:") + 
  theme_minimal() + 
  theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        legend.position = "bottom",
        legend.background = element_rect(linetype = "solid", colour = "grey"),
        legend.key.size = unit(0.75, "cm"),
        legend.text = element_text(size = 12),
        axis.text.y = element_text(size = 10, color = "black"),
        axis.text.x = element_text(angle = 45, hjust=0),
        plot.margin = unit(c(1,1,1,0), "cm")
  ) +
  ylab("Study ID") + 
  xlab("CEESAT question") 
scoresplot
```